R code for lecture 9

# analysis of covariance where focus is on categorical variable
 
ipo <- read.delim('ecol 563/ipomopsis.txt')
ipo[1:10,]
out0 <- lm(Fruit~Grazing, data=ipo)
# significant grazing effect
anova(out0)
# grazing has positive effect
summary(out0)
# t-test with separate variances
t.test(Fruit ~ Grazing, data = ipo)
# t-test with pooled variances = ANOVA
t.test(Fruit ~ Grazing, data = ipo, var.equal=T)
# grazing has positve effect
boxplot(Fruit~Grazing, data=ipo)
 
# display covariate--root size
plot(Fruit~Root, data=ipo, col=as.numeric(Grazing))
 
# change response to ratio
out0a <- lm(I(Fruit/Root)~Grazing, data=ipo)
# grazing effect is not significant
anova(out0a)
# grazing has negative effect but not significant
summary(out0a)
 
# analysis of covariance model
out1 <- lm(Fruit~Grazing+Root, data=ipo)
anova(out1)
# put root first so control for root size
out1 <- lm(Fruit~Root+Grazing, data=ipo)
anova(out1)
# now grazing has a significant negative effect
summary(out1)
 
# model paramters
coef(out1)
# add regression model to graph with curve function
curve(coef(out1)[1] + coef(out1)[2]*x, lty=2, add=T)
curve(coef(out1)[1] + coef(out1)[3] + coef(out1)[2]*x, lty=2, add=T, col=2)
 
# examine interaction model
out2 <- lm(Fruit~Root*Grazing, data=ipo)
# test whether slopes are the same
anova(out2)
summary(out2)
 
# add interaction model to plot
curve(coef(out2)[1] + coef(out2)[2]*x, lty=2, add=T, col='grey70', lwd=2)
curve(coef(out2)[1] + coef(out2)[3] + coef(out2)[2]*x + coef(out2)[4]*x, lty=2, add=T, col=4, lwd=2)
 
## analysis of covariance model where focus is on continuous variable
 
goby <- read.delim('ecol 563/goby.txt')
goby[1:10,]
plot(mortality~density, data=goby, col=refuge)
 
# separate slopes model
mod1 <- lm(mortality~density*factor(refuge), data=goby)
coef(mod1)
anova(mod1)
# slopes in refuge 1 different from refuges 2 and 3
summary(mod1)
 
# refit model with 3 as the reference group
mod1a <- lm(mortality~density*factor(refuge,levels=3:1), data=goby)
# slope for refuge 2 is different from slope for refuge 1
summary(mod1a)
 
# add separate lines to scatter plot
abline(lm(mortality~density, data=goby[goby$refuge==1,]), col=1, lty=2)
abline(lm(mortality~density, data=goby[goby$refuge==2,]), col=2, lty=2)
abline(lm(mortality~density, data=goby[goby$refuge==3,]), col=3, lty=2)
 
# reparameterize model so we get estimates of slopes and intercepts for each refuge
mod1a <- lm(mortality~density:factor(refuge)+factor(refuge)-1, data=goby)
summary(mod1a)
 
# the two models are the same
# same MSE
summary(mod1a)$sigma
summary(mod1)$sigma
 
# same AIC and log-likelihood
AIC(mod1,mod1a)
 
# parameterized this way we can use confint to get confidence intervals for the mean
confint(mod1a)
 
# function to construct vector of regressors for use in model
myvec<-function(r,x) c(r==1, r==2, r==3, x*(r==1), x*(r==2), x*(r==3))
# choose one of the densities for refuge 1
goby$density[goby$refuge==1]
 
# values for regressors in for the three refuge values
mydat <- data.frame(r=1:3, x=1.125)
mydat
 
# create contrast matrix
cmat <- apply(mydat, 1, function(x) myvec(x[1], x[2]))
cmat
# C matrix needs to be transposed
cmat <- t(apply(mydat,1, function(x) myvec(x[1], x[2])))
cmat
# calculate means for each refuge when x = 1.125
cmat%*%coef(mod1a)
# variance-covariance matrix of means for this choice of x
vmat1 <- cmat%*%vcov(mod1a)%*%t(cmat)
vmat1
 
# function to calculate difference-adjusted confidence intervals
 ci.func <- function(rowvals, lm.model, glm.vmat) {
nor.func1a <- function(alpha, model, sig) 1-pt(-qt(1-alpha/2, model$df.residual) * sum(sqrt(diag(sig))) / sqrt(c(1,-1) %*% sig %*%c (1,-1)), model$df.residual) - pt(qt(1-alpha/2, model$df.residual) * sum(sqrt(diag(sig))) / sqrt(c(1,-1) %*% sig %*% c(1,-1)), model$df.residual, lower.tail=F)
nor.func2 <- function(a,model,sigma) nor.func1a(a,model,sigma)-.95
n <- length(rowvals)
xvec1b <- numeric(n*(n-1)/2)
vmat <- glm.vmat[rowvals,rowvals]
ind <- 1
for(i in 1:(n-1)) {
for(j in (i+1):n){
sig <- vmat[c(i,j), c(i,j)]
#solve for alpha
xvec1b[ind] <- uniroot(function(x) nor.func2(x, lm.model, sig), c(.001,.999))$root
ind <- ind+1
}}
1-xvec1b
}
 
# obtain confidence levels for comparing three means when x=1.125
ci.func(1:3, mod1a, vmat1)
 
# organize the calculations as a function of density
my.ci <- function(x) {
mydat <- data.frame(r=1:3,x=x)
cmat <- t(apply(mydat,1, function(x) myvec(x[1],x[2])))
vmat1 <- cmat%*%vcov(mod1a)%*%t(cmat)
ci.func(1:3, mod1a, vmat1)}
 
# test the function
my.ci(1.125)
 
# obtain the difference-adjusted confidence levels for comparing the 3 means at each possible density
sort(unique(goby$density))
ci.vals <- sapply(sort(unique(goby$density)),my.ci)
dim(ci.vals)
ci.vals
 
# values range from 0.85 to 0.87
max(ci.vals)
min(ci.vals)
 
# plot regression lines with 95% and difference-adjusted confidence intervals for mean
 
# set up graph
plot(mortality~density, data=goby, ylim=c(0,4), type='n')
# select data for refuge 1
cur.x <- sort(unique(goby$density[goby$refuge==1]))
# create matrix of regressors
my.c <- t(sapply(cur.x, function(x) myvec(1,x)))
my.c
# calculate means for those values of the regressors
cur.y <- my.c %*% coef(mod1a)
cur.y
# standard errors of the means
cur.se <- sqrt(diag(my.c %*% vcov(mod1a) %*% t(my.c)))
# add regression lines
lines(cur.x, cur.y, col=2)
# 95% confidence intervals
segments(cur.x, cur.y+qt(.025, mod1a$df.residual)*cur.se, cur.x, cur.y+qt(.975,mod1a$df.residual)*cur.se, col=2)
# difference-adjusted confidence intervals
segments(cur.x, cur.y+qt((1-.86)/2, mod1a$df.residual)*cur.se, cur.x, cur.y+qt(1-(1-.86)/2,mod1a$df.residual)*cur.se, col='pink2',lwd=3)
 
# organize everything as a function of refuge
 
plot.func <- function(r, col1, col2){
cur.x<-sort(unique(goby$density[goby$refuge==r]))
 my.c <- t(sapply(cur.x, function(x) myvec(r,x)))
cur.y <- my.c %*% coef(mod1a)
cur.se <- sqrt(diag(my.c %*% vcov(mod1a) %*% t(my.c)))
 lines(cur.x, cur.y, col=col1)
 segments(cur.x, cur.y+qt(.025, mod1a$df.residual)*cur.se, cur.x, cur.y+qt(.975, mod1a$df.residual)*cur.se, col=col1)
 segments(cur.x, cur.y+qt((1-.86)/2, mod1a$df.residual)*cur.se, cur.x, cur.y+qt(1-(1-.86)/2, mod1a$df.residual)*cur.se, col=col2, lwd=3)
}
 
# plot regression lines and confidence intervals
plot(mortality~density, data=goby, ylim=c(0,4), type='n')
plot.func(1, 2, 'pink2')
plot.func(2, 'dodgerblue4', 'dodgerblue1')
plot.func(3, 'seagreen4', 'seagreen3')
R Code for Lecture 9 (Monday, September 24, 2012)